library(modplots)
##
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
##
## The following objects are masked from 'package:base':
##
## intersect, t
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# Mitochondrial genes
mt <- c("ENSGALG00000035334", #COX3
"ENSGALG00000032456", #COII
"ENSGALG00000032142", #MT-CO1
"ENSGALG00000032079", #MT-CYB
"ENSGALG00000037838", #ND6
"ENSGALG00000029500", #ND5
"ENSGALG00000036229", #MT-ND4
"ENSGALG00000042478", #ND4L
"ENSGALG00000030436", #ND3
"ENSGALG00000041091", #MT-ATP6
"ENSGALG00000032465", #MT-ATP8
"ENSGALG00000043768", #MT-ND2
"ENSGALG00000042750") #MT-ND1
# volcanoplot thresholds
p.adj <- 0.05
l2fc <- 0.5
# seurat object
my.se <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_seurat_250723.rds")
# cluster labels from B10int and L10int
ctrl_lumb_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_lumb_int_combined_labels.rds")
identical(colnames(my.se), rownames(ctrl_lumb_int_combined_labels))
## [1] TRUE
my.se$annot_sample <- ctrl_lumb_int_combined_labels$annot_sample
my.se@active.assay <- "RNA"
We do DE analysis per cluster, contrasting B10 and L10 samples:
markers <- list()
numbers <- list()
composition <-list()
for (i in seq(levels(Idents(my.se)))) {
# subset for individual clusters
mn.se <- subset(x = my.se, idents = levels(Idents(my.se))[i])
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|lumb")
composition[[i]] <- mn.se[[]] %>%
select(sample, annot_sample)
Idents(mn.se) <- "sample"
tmp_markers <- FindMarkers(mn.se,
ident.1 = "ctrl",
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.2,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST",
assay = "RNA")
# cell numbers per sample
numbers[[i]] <- data.frame(table(mn.se$sample))
tmp_markers <- tmp_markers %>%
rownames_to_column("Gene.stable.ID") %>%
left_join(gnames)
markers[[i]] <- tmp_markers
}
names(markers) <- paste0("cl-", levels(Idents(my.se)))
names(numbers) <- paste0("cl-", levels(Idents(my.se)))
names(composition) <- paste0("cl-", levels(Idents(my.se)))
# bind lists into data frames
lumb_markers <- bind_rows(markers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_numbers <- bind_rows(numbers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_composition <- bind_rows(composition, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
saveRDS(lumb_markers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds")
saveRDS(lumb_numbers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_numbers.rds")
saveRDS(lumb_composition, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_composition.rds")
# load the DE data
lumb_markers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
lumb_numbers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_numbers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
lumb_composition <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_composition.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
Plot the cluster compositions and the number of marker genes.
DimPlot(my.se, label = TRUE, reduction = "tsne")
ggplot(data = lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 3) +
geom_text(nudge_y = 50, size = 3) +
ggtitle("Cluster composition by sample (nCells)")
ggplot(data = lumb_markers %>% filter(p_val_adj < 0.05 & avg_log2FC > 0.5), aes(x = cluster, group = cluster)) +
geom_bar() +
facet_wrap("cluster", nrow = 3,scales = "free_x") +
ggtitle("Number of sig. DE genes")
We select the clusters with the highest amount of neighbourhoods with DA of cells. Those are clusters 3 (neurons), 21, 22 (Glycogen body), and 30 motor neurons.
# vector of clusters
select_clusters <- c(3,21,22,30)
select_lumb_markers <- filter(lumb_markers, clust_id %in% select_clusters)
select_lumb_numbers <- filter(lumb_numbers, clust_id %in% select_clusters)
select_lumb_composition <- filter(lumb_composition, clust_id %in% select_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = select_lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- select_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}(?=trl|umb)")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BL_select_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("BL10int cluster contributions")
BL_select_barplot
pdf("~/spinal_cord_paper/figures/Fig_4_BL10int_high_DA_clust_contribution.pdf")
BL_select_barplot
dev.off()
## svg
## 2
toplot <- select_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1) +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
volplot
# ggplotly(volplot, width = 1000, height = 600)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1) +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
volplot_nomt
# ggplotly(volplot_nomt, width = 1000, height = 600)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 8233 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 208 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 534 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 52 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Cluster 30 (Consisting of 161 B10 vs 13 L10 cells) shows no MN related markers. Seemingly, those 13 cells are indeed motor neurons. This is supported by the fact of their expression of TUBB3, FOXP1, and SLC18A3.
# Motor neuron cluster
mn.se <- subset(x = my.se, idents = 30)
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|lumb")
mn.se@active.assay <- "integrated"
mn_markers <- gnames[grepl("TUBB3|FOXP1|SLC18A3", gnames$Gene.name),]
mn_markers
VlnPlot(mn.se, group.by = "sample", mn_markers$Gene.stable.ID, cols = c("darkgrey", "#419c73"))
In contrast to the clusters above, we now select clusters 17, 18, and 19. The show a very low amount of DA neighbourhoods, are of similar size and represent neuronal, MFOL, and NPC populations.
# vector of MFOL clusters
low_clusters <- c(17, 18, 19)
low_lumb_markers <- filter(lumb_markers, clust_id %in% low_clusters)
low_lumb_numbers <- filter(lumb_numbers, clust_id %in% low_clusters)
low_lumb_composition <- filter(lumb_composition, clust_id %in% low_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = low_lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- low_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}(?=trl|umb)")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BL_low_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BL_low_barplot
pdf("~/spinal_cord_paper/figures/Fig_4_BL10int_low_DA_clust_contribution.pdf")
BL_low_barplot
dev.off()
## svg
## 2
toplot <- low_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
xlim(c(-3.5, 3.5)) +
facet_wrap("cluster", nrow = 1) +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
volplot
# ggplotly(volplot, width = 800, height = 500)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
xlim(c(-1.5, 1.5)) +
facet_wrap("cluster", nrow = 1) +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes)")
volplot_nomt
# ggplotly(volplot_nomt, width = 800, height = 500)
p1 <- volplot +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
p2 <- volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
p1
## Warning: Removed 1289 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p2
## Warning: Removed 1264 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_4_BL10int_low_DA_volplots.pdf", width = 8, height = 4)
p1
## Warning: Removed 1289 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p2
## Warning: Removed 1264 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
dev.off()
## svg
## 2
# Date and time of Rendering
Sys.time()
## [1] "2025-02-05 10:55:45 CET"
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Zurich
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.10.4 Seurat_5.1.0 SeuratObject_5.0.2 sp_2.1-4
## [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [13] ggplot2_3.5.1 tidyverse_2.0.0 modplots_1.0.0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.1
## [3] later_1.4.1 polyclip_1.10-7
## [5] fastDummies_1.7.4 lifecycle_1.0.4
## [7] globals_0.16.3 lattice_0.22-6
## [9] MASS_7.3-61 magrittr_2.0.3
## [11] sass_0.4.9 rmarkdown_2.29
## [13] jquerylib_0.1.4 yaml_2.3.10
## [15] httpuv_1.6.15 sctransform_0.4.1
## [17] spam_2.11-0 spatstat.sparse_3.1-0
## [19] reticulate_1.40.0 cowplot_1.1.3
## [21] pbapply_1.7-2 DBI_1.2.3
## [23] RColorBrewer_1.1-3 abind_1.4-8
## [25] zlibbioc_1.52.0 Rtsne_0.17
## [27] GenomicRanges_1.58.0 BiocGenerics_0.52.0
## [29] GenomeInfoDbData_1.2.13 IRanges_2.40.1
## [31] S4Vectors_0.44.0 ggrepel_0.9.6
## [33] irlba_2.3.5.1 listenv_0.9.1
## [35] spatstat.utils_3.1-1 pheatmap_1.0.12
## [37] goftest_1.2-3 RSpectra_0.16-2
## [39] spatstat.random_3.3-2 fitdistrplus_1.2-1
## [41] parallelly_1.40.1 leiden_0.4.3.1
## [43] codetools_0.2-20 DelayedArray_0.32.0
## [45] tidyselect_1.2.1 UCSC.utils_1.2.0
## [47] farver_2.1.2 matrixStats_1.4.1
## [49] stats4_4.4.1 spatstat.explore_3.3-3
## [51] jsonlite_1.8.9 progressr_0.15.1
## [53] ggridges_0.5.6 survival_3.7-0
## [55] tools_4.4.1 ica_1.0-3
## [57] Rcpp_1.0.13-1 glue_1.8.0
## [59] gridExtra_2.3 SparseArray_1.6.0
## [61] xfun_0.49 MatrixGenerics_1.18.0
## [63] GenomeInfoDb_1.42.1 withr_3.0.2
## [65] fastmap_1.2.0 fansi_1.0.6
## [67] digest_0.6.37 timechange_0.3.0
## [69] R6_2.5.1 mime_0.12
## [71] colorspace_2.1-1 scattermore_1.2
## [73] tensor_1.5 spatstat.data_3.1-4
## [75] RSQLite_2.3.7 utf8_1.2.4
## [77] generics_0.1.3 data.table_1.16.4
## [79] httr_1.4.7 htmlwidgets_1.6.4
## [81] S4Arrays_1.6.0 uwot_0.2.2
## [83] pkgconfig_2.0.3 gtable_0.3.6
## [85] blob_1.2.4 lmtest_0.9-40
## [87] XVector_0.46.0 htmltools_0.5.8.1
## [89] dotCall64_1.2 org.Gg.eg.db_3.20.0
## [91] scales_1.3.0 Biobase_2.66.0
## [93] png_0.1-8 spatstat.univar_3.1-1
## [95] knitr_1.49 rstudioapi_0.16.0
## [97] tzdb_0.4.0 reshape2_1.4.4
## [99] nlme_3.1-165 cachem_1.1.0
## [101] zoo_1.8-12 KernSmooth_2.23-24
## [103] vipor_0.4.7 parallel_4.4.1
## [105] miniUI_0.1.1.1 AnnotationDbi_1.68.0
## [107] ggrastr_1.0.2 pillar_1.9.0
## [109] grid_4.4.1 vctrs_0.6.5
## [111] RANN_2.6.2 promises_1.3.2
## [113] xtable_1.8-4 cluster_2.1.6
## [115] beeswarm_0.4.0 evaluate_1.0.1
## [117] cli_3.6.3 compiler_4.4.1
## [119] rlang_1.1.4 crayon_1.5.3
## [121] future.apply_1.11.3 labeling_0.4.3
## [123] plyr_1.8.9 ggbeeswarm_0.7.2
## [125] stringi_1.8.4 viridisLite_0.4.2
## [127] deldir_2.0-4 munsell_0.5.1
## [129] Biostrings_2.74.0 lazyeval_0.2.2
## [131] spatstat.geom_3.3-4 Matrix_1.7-0
## [133] RcppHNSW_0.6.0 hms_1.1.3
## [135] patchwork_1.3.0 bit64_4.0.5
## [137] future_1.34.0 KEGGREST_1.46.0
## [139] shiny_1.10.0 SummarizedExperiment_1.36.0
## [141] ROCR_1.0-11 igraph_2.1.2
## [143] memoise_2.0.1 bslib_0.8.0
## [145] bit_4.0.5